{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise with bank marketing data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction\n",
    "\n",
    "- Data from the UCI Machine Learning Repository: [data](https://github.com/justmarkham/DAT8/blob/master/data/bank-additional.csv), [data dictionary](https://archive.ics.uci.edu/ml/datasets/Bank+Marketing)\n",
    "- **Goal:** Predict whether a customer will purchase a bank product marketed over the phone\n",
    "- `bank-additional.csv` is already in our repo, so there is no need to download the data from the UCI website"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1: Read the data into Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>job</th>\n",
       "      <th>marital</th>\n",
       "      <th>education</th>\n",
       "      <th>default</th>\n",
       "      <th>housing</th>\n",
       "      <th>loan</th>\n",
       "      <th>contact</th>\n",
       "      <th>month</th>\n",
       "      <th>day_of_week</th>\n",
       "      <th>...</th>\n",
       "      <th>campaign</th>\n",
       "      <th>pdays</th>\n",
       "      <th>previous</th>\n",
       "      <th>poutcome</th>\n",
       "      <th>emp.var.rate</th>\n",
       "      <th>cons.price.idx</th>\n",
       "      <th>cons.conf.idx</th>\n",
       "      <th>euribor3m</th>\n",
       "      <th>nr.employed</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30</td>\n",
       "      <td>blue-collar</td>\n",
       "      <td>married</td>\n",
       "      <td>basic.9y</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>cellular</td>\n",
       "      <td>may</td>\n",
       "      <td>fri</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>nonexistent</td>\n",
       "      <td>-1.8</td>\n",
       "      <td>92.893</td>\n",
       "      <td>-46.2</td>\n",
       "      <td>1.313</td>\n",
       "      <td>5099.1</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>39</td>\n",
       "      <td>services</td>\n",
       "      <td>single</td>\n",
       "      <td>high.school</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>telephone</td>\n",
       "      <td>may</td>\n",
       "      <td>fri</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>nonexistent</td>\n",
       "      <td>1.1</td>\n",
       "      <td>93.994</td>\n",
       "      <td>-36.4</td>\n",
       "      <td>4.855</td>\n",
       "      <td>5191.0</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>25</td>\n",
       "      <td>services</td>\n",
       "      <td>married</td>\n",
       "      <td>high.school</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>telephone</td>\n",
       "      <td>jun</td>\n",
       "      <td>wed</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>nonexistent</td>\n",
       "      <td>1.4</td>\n",
       "      <td>94.465</td>\n",
       "      <td>-41.8</td>\n",
       "      <td>4.962</td>\n",
       "      <td>5228.1</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>38</td>\n",
       "      <td>services</td>\n",
       "      <td>married</td>\n",
       "      <td>basic.9y</td>\n",
       "      <td>no</td>\n",
       "      <td>unknown</td>\n",
       "      <td>unknown</td>\n",
       "      <td>telephone</td>\n",
       "      <td>jun</td>\n",
       "      <td>fri</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>nonexistent</td>\n",
       "      <td>1.4</td>\n",
       "      <td>94.465</td>\n",
       "      <td>-41.8</td>\n",
       "      <td>4.959</td>\n",
       "      <td>5228.1</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>47</td>\n",
       "      <td>admin.</td>\n",
       "      <td>married</td>\n",
       "      <td>university.degree</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>cellular</td>\n",
       "      <td>nov</td>\n",
       "      <td>mon</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>999</td>\n",
       "      <td>0</td>\n",
       "      <td>nonexistent</td>\n",
       "      <td>-0.1</td>\n",
       "      <td>93.200</td>\n",
       "      <td>-42.0</td>\n",
       "      <td>4.191</td>\n",
       "      <td>5195.8</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   age          job  marital          education default  housing     loan  \\\n",
       "0   30  blue-collar  married           basic.9y      no      yes       no   \n",
       "1   39     services   single        high.school      no       no       no   \n",
       "2   25     services  married        high.school      no      yes       no   \n",
       "3   38     services  married           basic.9y      no  unknown  unknown   \n",
       "4   47       admin.  married  university.degree      no      yes       no   \n",
       "\n",
       "     contact month day_of_week ...  campaign  pdays  previous     poutcome  \\\n",
       "0   cellular   may         fri ...         2    999         0  nonexistent   \n",
       "1  telephone   may         fri ...         4    999         0  nonexistent   \n",
       "2  telephone   jun         wed ...         1    999         0  nonexistent   \n",
       "3  telephone   jun         fri ...         3    999         0  nonexistent   \n",
       "4   cellular   nov         mon ...         1    999         0  nonexistent   \n",
       "\n",
       "  emp.var.rate  cons.price.idx  cons.conf.idx  euribor3m  nr.employed   y  \n",
       "0         -1.8          92.893          -46.2      1.313       5099.1  no  \n",
       "1          1.1          93.994          -36.4      4.855       5191.0  no  \n",
       "2          1.4          94.465          -41.8      4.962       5228.1  no  \n",
       "3          1.4          94.465          -41.8      4.959       5228.1  no  \n",
       "4         -0.1          93.200          -42.0      4.191       5195.8  no  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/bank-additional.csv'\n",
    "bank = pd.read_csv(url, sep=';')\n",
    "bank.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Prepare at least three features\n",
    "\n",
    "- Include both numeric and categorical features\n",
    "- Choose features that you think might be related to the response (based on intuition or exploration)\n",
    "- Think about how to handle missing values (encoded as \"unknown\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'age', u'job', u'marital', u'education', u'default', u'housing',\n",
       "       u'loan', u'contact', u'month', u'day_of_week', u'duration', u'campaign',\n",
       "       u'pdays', u'previous', u'poutcome', u'emp.var.rate', u'cons.price.idx',\n",
       "       u'cons.conf.idx', u'euribor3m', u'nr.employed', u'y'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# list all columns (for reference)\n",
    "bank.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### y (response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# convert the response to numeric values and store as a new column\n",
    "bank['outcome'] = bank.y.map({'no':0, 'yes':1})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### age"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xc1c3160>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEaCAYAAADqqhd6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHSFJREFUeJzt3XuUHOV55/HvT5LF3bQEtiRs8NjYCvHaMFzjGGwajC+w\nDpE3MQ6+oOEQnLMxNthxDsPJOhqSdRaRtVexiUkMC5r1JYAJxkviHKQQtULWFwxouBq0rGkCCZJA\n0tiADRjp2T/qHak1zEx3S91dXd2/zzkD9VZXdT3TU3q6+nnfelsRgZmZ9ZZZeQdgZmat5+RuZtaD\nnNzNzHqQk7uZWQ9ycjcz60FO7mZmPcjJvQ9J2iZpnaQxSXdJ+vUWP39Z0i11tjm51cftBElVSfOn\nWP9sB2M4StLpnTqeFZOTe3/6eUQcHRGDwCXAf8shhlOAt+/uzkpaGE+jprsxpJM3jBwNnNHB41kB\nObnbgcAW2JEw/1zSfZLulXRWWr9C0ufS8nslrU3brpT0V5J+JOlhSf9x8pNLmi/pZkn3SPq+pLdK\nGgB+D/h0+gRx0qR9XiVptaT7JV01cbUsaSAdZxS4Dzh0mnh3+eQg6QpJS9NyVdLytP0PJR1ec8wb\nJd2Rft6e1h8kadVELMC0byiSvpi2+0dJB0s6XNJdNY+/qbZds35Q0g/Sa3STpFJaX5F0bFo+WNKj\nkl4B/AnwofTafVDS/pKuTb/TPZI+kPY5O627T9JlNcd7VtLlKdbVkt6W/qb/T9JvpG1mp9f2jvSc\nH5/u97YuFRH+6bMf4CVgHfBjYBw4Oq3/LWAVWQJ7NfAYsADYB7if7Gr7IeD1afuVwHfT8huBx4G9\ngDJwS1r/ZeBzafkUYF1aXgZ8Zpr4rgAuTsvvBbYD84EBYBtwwgzxLqw9fk0M56TlR4FL0vLHauL8\nJnBiWj4MeDAtfwn4L2n5jIlYpoh5O3B2Wv4c8OW0/E/AUWn5z4BPTLHvvcA70vKlwP9Iy2uAY9Ly\nwcCjaXkp8KWa/ZcDX6xpl4BD0utxEDAbuA34zZpY35uWb0qv4WzgyJq/z8eBP0rLewE/AgbyPnf9\n0/iPr9z70y8iK8v8KvA+4Gtp/UnANyOzCVhLlkh/AZwPrCZLWo+m7QO4ASAiHgF+Ahwx6VgnTjx/\nRKwBDpJ0QHpsuqvgE4Hr0j63AltrHnssIu6o2W5yvMdTv0TyN+n/1wETdf/TgCskrQO+AxwgaT/g\nHcDXUyzfnRRLre3A9Wn562SvJcDVwLmSZgFnkb2J7CDpQODAiLg9rRoF3lknfrHra/cu4C8nGhEx\nTvY6rImIzRGxDfhGzfO+mF5XyD4BrUnb3E/2BgrwHuCc9Hr8gOzN9Y114rIuMifvACxfEfGD9JH/\nVWRJsTZpiJ2J8kjgKeA1dZ5y+xTrdqc2Pt0+z9XZLsg+mdReuOwzw3Emfj8BvxYRL+7y5FlZv9n4\na1+3m8g+pfwTcGdETPfmULvvhNrfY+8m9oOZ/5a/rFm/HXgRICK2S6rNCRdExOo6x7Uu5Sv3Pifp\nCLLz4GngdrJa7qyU7N8B3CHpdcBnyDryTpd0wsTuwAdT/f1w4A3Aw5MOcTvwkXSsMvBURDwDPAMc\nwNT+D9lVLpLeA8ybZrvJ8b4TuAP4V+DNkuam+vWpk/b7UM3/v5eWVwGfqnldjkqL/wx8OK07fYZY\nZgEfTMsfTrEREc8DtwJXAtdO3ikifgpsrel3+BhQSctV4Li0/Ns1u/2MXV+71cAnamIvkb0OJ6c+\ng9nA75B9smnUrcDvTyR7SYsl7dvE/pYzJ/f+tE/qjFtHVppYmkob3yar/95DVqP9w1TuuBr4g4jY\nAJwHXC1pL7IrwX8lSyTfBX4vXfkGO68SR4BjJd1DVnNemtbfAnwgxXHipPguBd4j6T6ypLaB7M2A\nmudlungj4nGyctH9ZKWSuyc9/7wUzyeBT6d1nwKOS52HD5B1+E7E8k5J9wMfIKtjT+U54IQUc5ms\n03PCN8mukFdNs+9S4M9TTEfW7Pvfgf8s6W6y2vnE776G7M1rnaQPAv81/U73SRoDyulvNZy2HSP7\n1DDRyTy5bBVTLF8NPAjcnX6nK/En/UJRhKf8td0j6VqyDsmbWvy8c4FtEbFN2Vj4v4yIY1r03I8C\nx0bEllY8X4PH/CxwQEQs69QxzfxObN3oMOCG1An5Illnbqt09GpG0reB1/Py0pBZW/nK3cysB7nm\nbn1B0rCkRyT9TNIDkpak9bMlfUHSU5J+IukCSdvTpwYkHSjpf0r6d0lPSPrTicfMupnLMtYvHgFO\niogNyu5k/bqkNwJLyMb6HwX8HLiRXUs3K8k6dA8H9gf+juxmra92LnSz5rksY30pjRRaBlwIXBcR\nV6X17yIbWjgHeBXZ6JhSGtKIpLOB8yPCNXTrar5yt74g6RyyYY8DadX+ZLf0H0J2JT7hiZrl1wGv\nAJ7UzjnKZpEN/zTrak7u1vPSTVhfJRux8v2IiHTlLuBJ4NCazWuXHwdeAA6KiKnuvDXrWu4Ysn6w\nH1kd/WlglqRzgbekx24ALpR0SLqz8+K0LRHxJNmNR1+UdEC6E/ZwSfXmfjHLnZO79byIeBD4AvB9\nss7RtwD/QpbEryJL4PcCdwF/T3YD1cSV+jnAXLK7NbcA3yKbedKsq9XtUJV0IfC7ZB9hr4qIv1D2\nTTTXk9Ukq8BZaSY6s0JL88dcGREDecditidmvHKX9BayxH482VCx96cJooaB1RGxmGxOj+F2B2rW\nDpL2lnSGpDmSXkM2gqal0ymY5aFeWeYI4IcR8Xya73kt2RcknEk27zTp/0vaF6JZW4lscrMtZBOM\nPQD8cZ4BmbXCjGWZNB3sd8i+0OB54B+BO4GPRcS8tI2ALRNtMzPL34xDISPiIUnLyTqcniObOnTb\npG1Cku+EMjPrInXHuUfENcA1AJI+T3aTx0ZJC9Ot3IuATVPt66RvZtZeETHlN4XVTe6SXh0RmyQd\nBvwn4G1kU5guJfti3qXAzTMceLcCtumNjIwwMjKSdxhmDfM52x41d06/TCN3qN4o6SCy7138/Yj4\nqaTLyObbPo80FLIVgVpjqtW8IzCzbtdIWeZld+Olb7E5rS0RWV1jY9W8QzBrStVXJB3nO1QLaOHC\nwbxDMGvK4KDP2U5r65S/ksI199aoVLIfgEsvhWXp2zjL5ezHzPqPpN3vULXuMDmJu2/KzGbiskwB\nVauVvEMwa0pl4mOndYyTewG5fGlFc+ONeUfQf1xzN7O2K5d39hlZ68xUc/eVewH5H4mZ1ePkXkAr\nV1byDsGsrhUrdg4EWLu2smN5xYp84+oXHi1jZm1x0UXZD2T9RP7E2VlO7gVRO859dLTMwEC27HHu\nVgSlUjnvEPqOk3tBeJy7FdkSf51Px7nmXkAe525FMzhYyTuEvuPkXkAe525m9Xicu5lZQXmcu5nl\nysMfO8/JvYA8T4cVje/N6DwndzOzHuShkIVUzjsAs7pWrICb07cr33NPecdQ3iVLdt7cZO1Tt0NV\n0iXAR4HtwH3AucB+wPXA60jfoRoR41Ps6w7VNhgZ8Th3KxZPHNYeu92hKmkAOB84JiLeCswGfgcY\nBlZHxGLgttS2DvE4dyua8fFK3iH0nXplmZ8BvwT2lbQN2Bf4d+AS4OS0zShQwQm+rXadfgBPP2Bd\nR5ryAjL5LaRTpnzEn+7bo5GyzMeBLwC/AG6NiI9J2hoR89LjArZMtCft67JMG/gjrpnBnpVlDgcu\nAgaAQ4D9JX20dpuUvZ3Bzcy6SL2yzHHA9yJiM4Ckm4BfBzZIWhgRGyQtAjZN9wRDQ0MMpBpCqVRi\ncHCQcqojTIzXdrt+u1LZOVZ47VoYGSlTrVYYHISLLso/Prfdnqk9NFRhaIiuiaeo7bGxMcbHs7Er\n1WqVmcxYlpF0FPAN4HjgeWAlcAfZKJnNEbFc0jBQioiX1dxdlmmPoaEKK1eW8w7DrGFShYhy3mH0\nnJnKMjNeuUfEPZL+F3An2VDIu4GvAgcAN0g6jzQUsqUR24w2bCjnHYJZk8p5B9B3PHFYAQ0OwthY\n3lGYNU4Cp4LW88RhPaeSdwBmTarkHUDf8fQDBbHrrdw7x7b7Vm4zm4rLMgU0MAB1OsrNuoqnzGiP\nmcoyTu4F5ORuZrAHo2Wse9ROP/DYYxVGRsqApx+wYqhUKjvGa1tn+Mq9gBYurHg4pBWKk3t7uCzT\nY1yWMTNwWaYn1I6Weewxj5Yxs5n5yr2ABgcrjI2V8w7DrGGeMqM9fBNTj3nqqbwjMGvO6GjeEfQf\nJ/cC2mefct4hmDWpnHcAfcfJvYBe+9q8IzCzbufkXhArVuwc0752bWXH8ooV+cZl1phK3gH0HXeo\nFtD++1d49tly3mGYNczzubeHO1R7zLZt5bxDMGvKsmXlvEPoO75yL4jace5r18LJJ2fLHudu1r98\nh2qPcVnGisbTD7SH71DtAbVX7s895ztUzWxmda/cJf0KcF3NqjcAnwO+DlxP9mXZVeCsiBiftK+v\n3Ntgzhx46aW8ozCzvLWsLCNpFvBvwAnAJ4GnI+JySRcD8yJieNL2Tu5tsPfe8PzzeUdhZnlr5WiZ\n04BHIuJx4Exg4qbiUWDJ7odo9VxwQTYb5MAAvPBCZcfyBRfkG5dZI4aGKnmH0HeavXK/BrgzIr4i\naWtEzEvrBWyZaNds7yv3Npg7t8KLL5bzDsOsYR7n3h4tKctImktWknlzRDxVm9zT41siYv6kfZzc\n28BlGSsaCZwKWq9Vo2VOB+6KiIk5CTdKWhgRGyQtAjZNtdPQ0BADAwMAlEolBgcHdwyJqqTvjXO7\nfnvFCli5Mmu/8EKZchnGxyucdBJccUX+8bntttvtb4+NjTE+no1bqdb5xp5mrtyvA/4hIkZT+3Jg\nc0QslzQMlNyh2hmez92KxmWZ9tjjDlVJ+5F1pt5Us/oy4N2S1gOnpraZmXWBhsoyEfEccPCkdVvI\nEr61SdZPPZULkU6Zdj9/WrJu47llOs/TD5iZFZRnhewxEx0tZkXhc7bznNzNzHqQyzJmZgXlskyP\nGRnJOwIz63ZO7gV06aWVvEMwa4rnluk8J3cza7vR0frbWGu55l5AnqfDisbnbHu45m5m1mec3Aup\nkncAZk2q5B1A33FyL6ClS/OOwMy6nWvuZtZ2IyMewtsOLfsO1d04sJO7mVmbuEO1x3ieDisan7Od\n5+RuZtaDXJYxMysol2V6jDumzKweJ/cC8twyVjSeW6bzGv0O1ZKkGyX9WNKDkn5N0nxJqyWtl7RK\nUqndwZpZMXlumc5rqOYuaRRYGxHXSJoD7Af8EfB0RFwu6WJgXkQMT9rPNfc28DwdVjQ+Z9tjj8a5\nSzoQWBcRb5i0/iHg5IjYKGkhUImIIyZt4+TeBv6HYkXjc7Y99rRD9fXAU5KulXS3pKsk7QcsiIiN\naZuNwIIWxWt1VfIOwKxJlbwD6DuNJPc5wDHAVyLiGOA5YJfyS7o89/tyh3huGTOrZ04D2zwBPBER\nP0rtG4FLgA2SFkbEBkmLgE1T7Tw0NMTAwAAApVKJwcFByuUysPOuNbeba69c2V3xuO12vfayZeWu\niqeo7bGxMcbHxwGoVqvMpNEO1X8Gfjci1ksaAfZND22OiOWShoGSO1TNzDqnFTcxfRL4hqR7gCOB\nzwOXAe+WtB44NbWtAybe0c2Kwuds5zVSliEi7gGOn+Kh01objpmZtYLnljEzKyjPLdNjPLeMmdXj\n5F5AnlvGisZzy3Sek7uZtZ3nluk819wLyLdyW9H4nG0P19zNzPqMk3shVfIOwKxJlbwD6DtO7gXk\nuWXMrB7X3M2s7UZGPIS3HfZoPvc9PLCTu5lZm7hDtcd4ng4rGp+znefkbmbWg1yWMTMrKJdleow7\npsysHif3AvLcMlY0nlum85zczaztPLdM57nmXkCep8OKxudse7jmbmbWZxpK7pKqku6VtE7SHWnd\nfEmrJa2XtEpSqb2h2k6VvAMwa1Il7wD6TqNX7gGUI+LoiDghrRsGVkfEYuC21LYO8NwyZlZPQzV3\nSY8Cx0XE5pp1DwEnR8RGSQuBSkQcMWk/19zNesz8+bB1a/uPM28ebNnS/uMU2R7PLSPpJ8BPgW3A\nX0fEVZK2RsS89LiALRPtmv2c3M16TKc6R90JW18rOlRPjIijgdOBT0h6R+2DKYP7z9AhnqfDisbn\nbOfNaWSjiHgy/f8pSd8GTgA2SloYERskLQI2TbXv0NAQAwMDAJRKJQYHBymXy8DOP7jbzbUndEs8\nbvdXGzpzPKhQqeT/+3ZTe2xsjPHxcQCq1SozqVuWkbQvMDsinpG0H7AKuBQ4DdgcEcslDQOliBie\ntK/LMmY9xmWZ7rGnZZkFwO2SxoAfAn8XEauAy4B3S1oPnJra1gGeW8bM6vEdqgUkVYgo5x2G9and\nuaKuVCo15Zb2Haff+A5VM7M+4yv3AvIVjeXJNffu4St3M7M+4+ReSJW8AzBrys5hlNYpTu45mz8/\n+/jZzA80v8/8+fn+nmbWWa6558z1Sysan7PdwzV3M7M+4+ReQK5fWtH4nO08J3czsx7kmnvOXL+0\novE52z1cczcz6zNO7gXk+qUVjc/ZznNyNzPrQa6558z1Sysan7PdwzV3M7M+4+ReQK5fWtH4nO08\nJ3czsx7kmnvOXL+0ovE52z32uOYuabakdZJuSe35klZLWi9plaRSKwM2M7M902hZ5kLgQWDifXQY\nWB0Ri4HbUts6xPVLKxqfs51XN7lLei1wBnA1MHH5fyYwmpZHgSVtic7MzHZL3Zq7pG8Bfwa8Evhs\nRPyGpK0RMS89LmDLRHvSvq651+H6pRWNz9nusds1d0nvBzZFxDp2XrXvImVv/wnMzLrInDqPvx04\nU9IZwN7AKyV9DdgoaWFEbJC0CNg03RMMDQ0xMDAAQKlUYnBwkHK5DOysw/V7G5rdPlvXLfG73V/t\nZs/X2nO1meNBhUol/9+3m9pjY2OMj48DUK1WmUnDQyElnczOsszlwOaIWC5pGChFxMs6VV2WqW93\nPnpWKpWafwDtO47ZVHzOdo+ZyjLNJvc/iIgzJc0HbgAOA6rAWRExPsU+Tu51uH5pReNztnu0JLnv\n5oGd3OvwPxQrHE2ZS9rDJ+2MPHFYj6mtX5p1mogs6TbxU1mzpul95HEae6Reh6q1WaBpxiG1+jg7\n/2tmvc9lmZy5LGNF43O2e7gsY2bWZ5zcC8g1dysan7Od5+RuZtaDXHPPmeuXVjQ+Z7uHa+5mZn3G\nyb2AXL+0ovE523lO7mZmPcg195y5fmlF43O2e7jmbmbWZ5zcC8j1Sysan7Od5+RuZtaDXHPPmeuX\nVjQ+Z7uHa+5mZn3Gyb2AXL+0ovE523mez70LdOKLbebNa/8xzKx7zFhzl7Q3sBbYC5gLfCciLknf\noXo98Dr8Haod51qk5ck19+6x2zX3iHgeOCUiBoEjgVMknQQMA6sjYjFwW2qbmVmXqFtzj4ifp8W5\nwGxgK3AmMJrWjwJL2hKdTaOSdwDW56RmfypN7+NS4p6pW3OXNAu4GzgcuDIiHpC0ICI2pk02Agva\nGKOZdZHdKZW4xNJ5dZN7RGwHBiUdCNwq6ZRJj4ck/9k6qpx3AGZNKucdQN9peLRMRPxU0t8DxwIb\nJS2MiA2SFgGbpttvaGiIgYEBAEqlEoODg5TLZWDn8Ci3m2svW9Zd8bjtttudaY+NjTE+no1dqVar\nzKTeaJmDgZciYlzSPsCtwKXAe4HNEbFc0jBQioiXdap6tEx7VCqVHX9wsyKQKkSU8w6j58w0Wqbe\nlfsiYDTV3WcBX4uI2yStA26QdB5pKGQrAzYzsz3juWXMrO1GRrIfa62Zrtyd3M3MCsoTh/WYiY4W\ns6LwOdt5Tu4FtHJl3hGYWbdzWaaAfEOImYHLMmZmfcfJvZAqeQdg1pShoUreIfQdJ3cza7vR0frb\nWGu55l5Arrlb0ficbQ/X3HvMsmV5R2Bm3c7JvYDK5UreIZg1qZJ3AH3Hyd3MrAc5uReQZ4S0opmY\npto6xx2qZmYF5Q7VHuN5OqxofM52npN7AXluGTOrx2WZAvKYYTMDl2XMzPqOk3shVfIOwKwpnlum\n8+omd0mHSloj6QFJ90v6VFo/X9JqSeslrZJUan+4ZlZEnlum8+rW3CUtBBZGxJik/YG7gCXAucDT\nEXG5pIuBeRExPGlf19zbwDV3Kxqfs+2xRzX3iNgQEWNp+Vngx8BrgDOBiffjUbKEbx3guWXMrJ6m\nau6SBoCjgR8CCyJiY3poI7CgpZHZtDy3jBVPJe8A+k7DyT2VZP4WuDAinql9LNVe/KHLzKxLzGlk\nI0mvIEvsX4uIm9PqjZIWRsQGSYuATVPtOzQ0xMDAAAClUonBwcEdc6NM3LXmtttu93Z72bJyV8VT\n1PbY2Bjj4+MAVKtVZtJIh6rIauqbI+LTNesvT+uWSxoGSu5QNTPrnD29ielE4KPAKZLWpZ/3AZcB\n75a0Hjg1ta0DJt7RzYrC52zn1S3LRMS/MP2bwGmtDccasXIleNZfM5uJ55YpII8ZNjOYuSzTUIeq\n5SPr7pjusen38xuq5WGm83UmPl/bw3PLdLGImPJnzZo10z7mfyiWl5nOyZnOWWsPJ3czsx7kmruZ\nWUF5Pnczsz7j5F5AHjNsReNztvOc3M3MepBr7mZmBeWau5lZn3FyLyDXL61ofM52npO7mVkPcs3d\nzKygXHM3M+szTu4F5PqlFY3P2c5zcjcz60GuuZuZFZRr7mZmfaZucpd0jaSNku6rWTdf0mpJ6yWt\nklRqb5hWy/VLKxqfs53XyJX7tcD7Jq0bBlZHxGLgttS2DhkbG8s7BLOm+JztvLrJPSJuB7ZOWn0m\nMJqWR4ElLY7LZjA+Pp53CGZN8Tnbebtbc18QERvT8kZgQYviMTOzFtjjDtU0HMZDYjqoWq3mHYJZ\nU3zOdl5DQyElDQC3RMRbU/shoBwRGyQtAtZExBFT7Oekb2bWRtMNhZyzm8/3v4GlwPL0/5ubOaiZ\nmbVX3St3SX8DnAwcTFZf/2PgO8ANwGFAFTgrItxjYmbWJdp6h6qZmeXDd6gWjKT3SXpI0v+VdHHe\n8ZjNZKqbIK0znNwLRNJs4Aqym8reDJwt6VfzjcpsRlPdBGkd4OReLCcAj0RENSJ+CVwH/GbOMZlN\na5qbIK0DnNyL5TXA4zXtJ9I6M7NdOLkXi3u/zawhTu7F8m/AoTXtQ8mu3s3MduHkXix3Am+SNCBp\nLvAhshvKzMx24eReIBHxEnABcCvwIHB9RPw436jMppdugvwesFjS45LOzTumfuGbmMzMepCv3M3M\nepCTu5lZD3JyNzPrQU7uZmY9yMndzKwHObmbmfUgJ3frC5KWpq+ENOsLTu7WL4aAQ/IOwqxTfBOT\nFZakzwATdzxeTfb1j7Vf5P5ZYD/gfmAl2dw8PwfeDrwVWJEefwE4FdgGXAkcC7wEfCYiKpKGgCXA\nvsCbgC8AewMfTvueERFbJR1ONt/+q9Jxzo+Ih9v3CphNz1fuVkiSjiW7Gj8BeBtwPlCatFkAERF/\nSzYvz4cj4hhgO9lc+J+KiEHgXcDzwCeAbRFxJHA2MCppr/Rc/wH4AHA88HngZ+m5vg+ck7b5KvDJ\niDgO+EPgK63+vc0aNSfvAMx200nATRHxCwBJNwHvnGI7TbH8K8CTEXEXQEQ8m57jROBLad3Dkh4D\nFpO9SayJiOeA5ySNA7ek57oPOFLSfmSfCL4l7Tjk3Fb8oma7w8ndiip4eeI+kF0/je7DrnPgN1KD\n1DTrX6hZ3l7T3k7272gWsDUijm7gGGZt57KMFdXtwBJJ+6Sr5iXAPwCvljQ/lVPeX7P9M8Ar0/LD\nwCJJxwFIOiB9P+3twEfSusXAYcBDTJ/wmXgsIp4BHpX022l/STqyNb+qWfOc3K2QImIdWSfpHcAP\ngKsi4k7gT9K6VWTTIk9YCfyVpLvJzvsPAV+WNEY2hfJeZDXyWZLuJavJL03fVRtM/wmg9rGPAOel\n57wfOLNVv69ZszxaxsysB/nK3cysBzm5m5n1ICd3M7Me5ORuZtaDnNzNzHqQk7uZWQ9ycjcz60FO\n7mZmPej/Az06t9SM9b3RAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xc1bfbe0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# probably not a great feature\n",
    "bank.boxplot(column='age', by='outcome')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### job"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "job\n",
       "admin.           0.131423\n",
       "blue-collar      0.069005\n",
       "entrepreneur     0.054054\n",
       "housemaid        0.100000\n",
       "management       0.092593\n",
       "retired          0.228916\n",
       "self-employed    0.081761\n",
       "services         0.089059\n",
       "student          0.231707\n",
       "technician       0.115774\n",
       "unemployed       0.171171\n",
       "unknown          0.102564\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature\n",
    "bank.groupby('job').outcome.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# create job_dummies (we will add it to the bank DataFrame later)\n",
    "job_dummies = pd.get_dummies(bank.job, prefix='job')\n",
    "job_dummies.drop(job_dummies.columns[0], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "default\n",
       "no         0.121267\n",
       "unknown    0.061021\n",
       "yes        0.000000\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature\n",
    "bank.groupby('default').outcome.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "no         3315\n",
       "unknown     803\n",
       "yes           1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# but only one person in the dataset has a status of yes\n",
    "bank.default.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# so, let's treat this as a 2-class feature rather than a 3-class feature\n",
    "bank['default'] = bank.default.map({'no':0, 'unknown':1, 'yes':1})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### contact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "contact\n",
       "cellular     0.141403\n",
       "telephone    0.051806\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature\n",
    "bank.groupby('contact').outcome.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# convert the feature to numeric values\n",
    "bank['contact'] = bank.contact.map({'cellular':0, 'telephone':1})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "month\n",
       "apr    0.167442\n",
       "aug    0.100629\n",
       "dec    0.545455\n",
       "jul    0.082982\n",
       "jun    0.128302\n",
       "mar    0.583333\n",
       "may    0.065312\n",
       "nov    0.096413\n",
       "oct    0.362319\n",
       "sep    0.406250\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature at first glance\n",
    "bank.groupby('month').outcome.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>month</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>dec</th>\n",
       "      <td>22</td>\n",
       "      <td>0.545455</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mar</th>\n",
       "      <td>48</td>\n",
       "      <td>0.583333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sep</th>\n",
       "      <td>64</td>\n",
       "      <td>0.406250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>oct</th>\n",
       "      <td>69</td>\n",
       "      <td>0.362319</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>apr</th>\n",
       "      <td>215</td>\n",
       "      <td>0.167442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nov</th>\n",
       "      <td>446</td>\n",
       "      <td>0.096413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>jun</th>\n",
       "      <td>530</td>\n",
       "      <td>0.128302</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>aug</th>\n",
       "      <td>636</td>\n",
       "      <td>0.100629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>jul</th>\n",
       "      <td>711</td>\n",
       "      <td>0.082982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>may</th>\n",
       "      <td>1378</td>\n",
       "      <td>0.065312</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       count      mean\n",
       "month                 \n",
       "dec       22  0.545455\n",
       "mar       48  0.583333\n",
       "sep       64  0.406250\n",
       "oct       69  0.362319\n",
       "apr      215  0.167442\n",
       "nov      446  0.096413\n",
       "jun      530  0.128302\n",
       "aug      636  0.100629\n",
       "jul      711  0.082982\n",
       "may     1378  0.065312"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# but, it looks like their success rate is actually just correlated with number of calls\n",
    "# thus, the month feature is unlikely to generalize\n",
    "bank.groupby('month').outcome.agg(['count', 'mean']).sort('count')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xc32e438>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEaCAYAAAAR0SDgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+cXHV97/HXO0EkFcoSsYQf0bUmXEkf4GIkWEEZbC/G\nHxXorVCwlW251l7EINY+mnivN4n31operSIVH7dYE6yg1B9Y5EcSNJPSHxB/sBiIINwyaFYSELII\nKpUkn/vH+U5ysuzuzOzuzJnZ834+HvPY7/nOOXM+MzmZz3y/33O+RxGBmZnZrKIDMDOz7uCEYGZm\ngBOCmZklTghmZgY4IZiZWeKEYGZmgBOCNUnSbkl3ShqS9B1JvznNr1+RdEODdU6b7v12gqSapLlj\n1D/VwRheJun1ndqf9SYnBGvWzyPixIgYAFYAf1VADKcDr5rsxkqmMZ5mjXexTycvAjoReEMH92c9\nyAnBJuNQ4HHY+yX7EUlbJH1P0jmp/uOS3p/Kr5O0Ka27RtKnJX1L0n2S3jj6xSXNlXS9pLsk/Zuk\n4yX1A+8ALk0tlVNHbfMCSRsk3S3pb+u/yiX1p/2sBbYA88eJd78WiqQrJF2QyjVJl6X175D0ktw+\nvyRpc3q8KtU/X9L6eizAuElI0sfSerdKOlzSSyR9J/f8wvxyrn5A0u3pM/qKpL5UX5W0OJUPl/Sg\npOcAHwDOTZ/dWyQdLOmz6T3dJenstM15qW6LpA/l9veUpA+nWDdIemX6N/1/kn4nrTM7fbab02v+\nyXjv27pURPjhR8MHsAu4E/g+MAKcmOr/C7Ce7Evv14CHgCOAOcDdZL/q7wVenNZfA9yUyguAHwHP\nBSrADan+k8D7U/l04M5UXgm8Z5z4rgD+IpVfB+wB5gL9wG5gyQTxzsvvPxfD21L5QWBFKv9hLs5r\ngFNS+YXA1lS+HPgfqfyGeixjxLwHOC+V3w98MpW/CbwslT8IvHOMbb8HvDqVVwN/ncobgZen8uHA\ng6l8AXB5bvvLgI/llvuAo9Ln8XxgNvAN4MxcrK9L5a+kz3A2cELu3+dPgP+eys8FvgX0F33s+tH8\nwy0Ea9YvIusyOg5YCnwu1Z8KXBOZR4BNZF++vwDeDmwg+6J7MK0fwHUAEfEA8O/AS0ft65T660fE\nRuD5kg5Jz433a/sU4Atpm3XAztxzD0XE5tx6o+M9icbdN9emv18A6uMYvw1cIelO4GvAIZKeB7wa\n+PsUy02jYsnbA3wxlf+e7LMEuAr4I0mzgHPIEs9ekg4FDo2I21LVWuA1DeIX+392vwX8TX0hIkbI\nPoeNEfFYROwGPp973V+mzxWyltbGtM7dZEkX4AzgbenzuJ0sIS9oEJd1kQOKDsB6T0TcnrojXkD2\nRZr/ohH7vlxPAB4Fjm7wknvGqJtMX/942/yswXpB1gLK/0CaM8F+6u9PwMkR8cv9Xjwbpmg1/vzn\n9hWy1tA3gW9HxHgJJb9tXf59HNTCdjDxv+Uzufo9wC8BImKPpPz3yMURsaHBfq1LuYVgLZP0UrJj\n5yfAbWR907NSgng1sFnSi4D3kA1mvl7SkvrmwFvSeMJLgF8H7hu1i9uAt6Z9VYBHI+JJ4EngEMb2\nL2S/ppF0BnDYOOuNjvc1wGbgh8AiSQem/vjXjtru3Nzff03l9cCy3OfyslT8J+D8VPf6CWKZBbwl\nlc9PsRERTwPrgCuBz47eKCKeAHbmxlH+EKimcg14RSr/Xm6zn7L/Z7cBeGcu9j6yz+G0NAYyG/h9\nshZUs9YBF9UThKRjJf1KC9tbwZwQrFlz0oDknWTdJhekbpevkvVn30XW5/znqSvmKuDPImI7cCFw\nlaTnkv3i/CHZl89NwDvSL+xg36/RVcBiSXeR9aFfkOpvAM5OcZwyKr7VwBmStpB9EW4nSyDkXpfx\n4o2IH5F1Zd1N1o3z3VGvf1iK513ApaluGfCKNIB6D9mgdz2W10i6GzibrF9+LD8DlqSYK2QDv3XX\nkP0SXz/OthcAH0kxnZDb9v8A/03Sd8nGAurvfSNZwrtT0luA/53e0xZJQ0Al/VstT+sOkbVO6gPt\no7vUYozyVcBW4LvpPV2JeyF6iiI8/bV1jqTPkg3KfmWaX/dAYHdE7FZ2rcLfRMTLx1l3DfCjiHh/\nk6/9ILA4Ih6fRFxvJRucfl2L270XOCQiVra6T7PJcva2meKFwHVpIPaXZAPa48m3RprR1LrKTo39\nd+CAiNgDEBGfJxucbZqkrwIv5tndVmZt5YRgHRURf9Sm130AGLNFMI6mB30j4tcBJM2qf9FP12uP\ns7+zp7K92WR5DMFmPEknSvqupJ9K+gLp7BtJg5JuG7XuHkn1BLBG0pWSblI2zURF0htTP/wTkn4o\nKd+l80/p70ja1ytH70PSq5RdlDeSLuD6zdxzVUkfkPTPaft1kp7frs/FbDQnBJvR0tjC9WTn6h8G\n/APZxWnNdhudB/yviDiY7Eymp4A/iIhDgTeSDeCemdZ9dfp7aET8akTcPiqWucCNwMfJztH/GHCj\npPxZSOcBg2QXzR0IvLelN2w2BU4INtO9kqxP/xMRsTsivkx2BW2z3TrXR8S/AUTEf0TEpoi4Jy1v\nITvj6rS0bqPXfCNwX0R8PiL2RMQXyK7ifnN6PoDPRsQD6dTT64CBJuM0mzInBJvpjgKGR9WNdxro\naEE2tcZekk6WtFHSI5JGyE41bbZb5yiyU25Hx3JUbnl7rvwL4OAmX9tsypwQbKZ7mGdfKf2i9Pdn\nwN4LpyTNa+L1riHrgjomIvqAT7Pv/1GjLqjh3L7zsYxOWGaFcEKwme5fgV2Slkl6jqTfZd/cRXcB\nv6HsXgEHkV0QlzdWF9DBwM6I+GW6+vp89iWCR8kuJnvJOLHcDByrbEbRAySdSzaP09cb7NOsI5wQ\nbEaLiGeA3yUbqH2MbHqLL6fn7ie7wvdWsukzbuPZV+CO/tV/EfABST8lm6H0i3tXjvg58JfAv0h6\nXNLJ+deIiMeANwF/Rjbtx3uBN4264K3R/s3apqkrldO8Jt8GtkXE76SzJb5I1tytAeek2RKRtAL4\nY7Iph5dFxPpUv5hs6uODyKY/vmTa342ZmU1asy2ES8jmKKlnj+XAhog4lmw+mOUAkhaRTf61iGyK\n5E9Je+9QdSVwYUQsBBZKWjo9b8HMzKZDw4Qg6Riym3xcxb7+zTeTnddN+ntWKp8JXBsRz0REDXgA\nOFnSkWTzstTnpL86t42ZmXWBZloIfw38OfvPWX9EROxI5R1kd8iC7PS5bbn1tpGd4TG6fpjGc+Sb\nmVkHTZgQJL0JeCQi7mScsx8iG4TwwJeZWY9rNLndq4A3S3oD2WDwr0r6HLBD0ryI2J66gx5J6w8D\n83PbH0PWMhhO5Xz9mOdeS3JyMTNro4gY8wd+0/dDkHQa8N50ltGHgcci4jJJy4G+iFieBpWvAZaQ\ndQndCiyIiJB0B9kNRTaTzedyeUTcMsZ+wvdomH6rVq1i1apVRYdh1jQfs+0hadyE0Or01/Vv6g+R\nzT1/Iem0U4CI2CrpOrIzknYBF+W+3S8iO+10Dtlpp89KBmZmVpymE0JEbCLdXzVdSPPb46z3QbLb\nHo6u/w5w/OTCtKmq1WpFh2DWEh+znecrlUtiYMCTZlpv8THbeV13T2WPIZiZtc9EYwhuIZiZGeCE\nUBrVarXoEMxa4mO285wQzMwM8BiCmVmpeAzBzMwackIoCffHWq/xMdt5TghmZgZ4DMHMrFQ8hmBm\nZg05IZSE+2Ot1/iY7TwnBDPrSkNDRUdQPk4IJVGpVIoOwawlIyOVokMoHScEMzMDWr9BjvWoarXq\nVoJ1vWo1ewCsXl0FKgBUKtnD2ssJwcy6Rv6Lv1YD30Gzs9xlVBJuHViv6e+vFB1C6UyYECQdJOkO\nSUOStkr6q1S/StI2SXemx+tz26yQdL+keyWdkatfLGlLeu4T7XtLZjYT+DdM502YECLiaeD0iBgA\nTgBOl3QqEMDHIuLE9LgZQNIi4FxgEbAU+JSk+hVxVwIXRsRCYKGkpe15SzYWn9NtvadadACl07DL\nKCJ+nooHArOBnWl5rEufzwSujYhnIqIGPACcLOlI4JCI2JzWuxo4ayqBm5nZ9GqYECTNkjQE7AA2\nRsQ96al3SbpL0mck9aW6o4Btuc23AUePUT+c6q1DPIZgvcbHbOc100LYk7qMjgFeI6lC1v3zYmAA\neBj4aDuDNDOz9mv6tNOIeELSjcArIqJar5d0FXBDWhwG5uc2O4asZTCcyvn64fH2NTg4SH9/PwB9\nfX0MDAzs/bVQ7wv3cmvL9bpuicfLXm60PPrYLTqeXl0eGhpiZGQEgFqtxkQmnP5a0uHArogYkTQH\nWAesBu6JiO1pnUuBkyLi/DSofA2whKxL6FZgQUSEpDuAZcBm4Ebg8oi4ZYx9evrrNqj6wjTrMT5m\n22Oi6a8bJYTjgbVkXUuzgM9FxEckXU3WXRTAg8A7ImJH2uZ9wB8Du4BLImJdql8MrAHmADdFxLJx\n9umEYGbWJpNOCEVwQjAzax/fIMf264816wU+ZjvPCcHMzAB3GZmZlYq7jMzMrCEnhJJwf6z1Gh+z\nneeEYGZmgBNCiVSKDsCsJb4orfOcEErCrW8za8QJoSRqtWrRIZi1xGMIned7Ks9g+RuWr10Lab5A\nfMNyMxuLr0MoiVWrfMNyM/N1CGZm1gQnhJLo66sWHYJZSzyG0HlOCCUxMFB0BGbW7TyGYGZWIh5D\nMDOzhpwQSsL9sdZrfMx2nhOCmZkBje+pfBCwCXgucCDwtYhYIWku8EXgRUANOCciRtI2K8juqbwb\nWBYR61N9/Z7KB5HdU/mScfbpMQQzszaZ9BhCRDwNnB4RA8AJwOmSTgWWAxsi4ljgG2kZSYuAc4FF\nwFLgU5LqO74SuDAiFgILJS2d+lszM7Pp0rDLKCJ+nooHArOBncCbgbWpfi1wViqfCVwbEc9ERA14\nADhZ0pHAIRGxOa13dW4b6wD3x1qv8THbeQ0TgqRZkoaAHcDGiLgHOCIidqRVdgBHpPJRwLbc5tuA\no8eoH071ZmbWJRpObhcRe4ABSYcC6ySdPur5kORO/y7nueWt1/iY7bymZzuNiCck3QgsBnZImhcR\n21N30CNptWFgfm6zY8haBsOpnK8fHm9fg4OD9KepOfv6+hgYGNh7cNSbkV72spe97OXGy0NDQ4yM\njABQq9WYSKOzjA4HdkXEiKQ5wDpgNfA64LGIuEzScqAvIpanQeVrgCVkXUK3AgtSK+IOYBmwGbgR\nuDwibhljnz7LqA2q1ereg8SsF/iYbY+JzjJq1EI4ElgraRbZeMPnIuIbku4ErpN0Iem0U4CI2Crp\nOmArsAu4KPftfhHZaadzyE47fVYyMDOz4nguIzOzEvFcRmZm1pATQknUB5vMeoWP2c5zQjAzM8Bj\nCGZmpeIxBDMza8gJoSTcH2u9xsds5zkhmJkZ4DEEM7NS8RiCmZk15IRQEu6PtV7jY7bznBDMzAzw\nGIKZWal4DMHMeo57jDrPCaEk3B9rvWbNmmrRIZSOE4KZmQEt3ELTepvvPGW9oFrd11W0dm2FdCdd\nKpXsYe3lhGBmXWP0F/+qVQUFUlLuMioJjyFYr6nVqkWHUDoNE4Kk+ZI2SrpH0t2SlqX6VZK2Sboz\nPV6f22aFpPsl3SvpjFz9Yklb0nOfaM9bMrOZYGCg6AjKp+F1CJLmAfMiYkjSwcB3gLOAc4AnI+Jj\no9ZfBFwDnAQcDdwKLIyIkLQZuDgiNku6Cbg8Im4Ztb2vQzAza5MpXYcQEdsjYiiVnwK+T/ZFDzDW\ni54JXBsRz0REDXgAOFnSkcAhEbE5rXc1WWIxM7Mu0NIYgqR+4ETg9lT1Lkl3SfqMpL5UdxSwLbfZ\nNrIEMrp+mH2JxdrMYwjWa3zMdl7TCSF1F30JuCS1FK4EXgwMAA8DH21LhGZm1hFNnXYq6TnAl4G/\nj4jrASLikdzzVwE3pMVhYH5u82PIWgbDqZyvHx5rf4ODg/SnE5D7+voYGBjYex59/VeDl73s5Zm9\nXKlUuiqeXl0eGhpiZGQEgFqtxkSaGVQWsBZ4LCIuzdUfGREPp/KlwEkRcX5uUHkJ+waVF6RB5TuA\nZcBm4EY8qGxm1lFTndzuFOAPgNNHnWJ6maTvSboLOA24FCAitgLXAVuBm4GLct/wFwFXAfcDD4xO\nBtY+9V8OZr3Cx2znNewyioh/ZuzEcfME23wQ+OAY9d8Bjm8lQDMz6wzfD8HMrER8PwQzM2vICaEk\n3B9rvcbHbOc5IZiZGeCEUBr185LNekel6ABKxwnBzLqSe4w6zwmhJNwfa73G90PoPN8xzcy6xv63\n0MS30OwwX4dgZl1p1SrfQrMdfB2CmZk15IRQEh5DsF7T11ctOoTScUIws67keyp3nscQzMxKxGMI\nZmbWkBNCSXgMwXqNj9nOc0IwMzPAYwhmZqXiMQQzM2uoYUKQNF/SRkn3SLpb0rJUP1fSBkk/kLRe\nUl9umxWS7pd0r6QzcvWLJW1Jz32iPW/JxuL+WOs1PmY7r5kWwjPApRHxG8ArgXdKOg5YDmyIiGOB\nb6RlJC0CzgUWAUuBT0mqN0+uBC6MiIXAQklLp/XdmJnZpLU8hiDpeuCK9DgtInZImgdUI+KlklYA\neyLisrT+LcAq4CHgmxFxXKr/faASEX866vU9hmBm1ibTNoYgqR84EbgDOCIidqSndgBHpPJRwLbc\nZtuAo8eoH071ZmbP4h6jzms6IUg6GPgycElEPJl/Lv2k98/6Lub+WOs1a9ZUiw6hdJq6H4Kk55Al\ng89FxPWpeoekeRGxXdKRwCOpfhiYn9v8GLKWwXAq5+uHx9rf4OAg/Wki9L6+PgYGBvbeArL+xebl\n1pbruiUeL3vZy51ZHhoaYmRkBIBarcZEGo4hpAHhtcBjEXFprv7Dqe4yScuBvohYngaVrwGWkHUJ\n3QosiIiQdAewDNgM3AhcHhG3jNqfxxDMSip/g5zVq2HlyqzsG+RMn4nGEJpJCKcC/wR8j33dQivI\nvtSvA14I1IBzImIkbfM+4I+BXWRdTOtS/WJgDTAHuCkilo2xPycEM/MNctpkSgmh05wQ2qNare5t\nRpr1gsHBKmvWVIoOY8bxlcpm1nMOPrjoCMrHCaE0KkUHYNaSp56qFB1C6TghlITPOrVe0+CEGGuD\npk47td53++1V3Eqwbpc/y2jTpiqrVlUAn2XUKU4IM1j+P9e6dfvO2PB/LjMbi88yKolKxd1G1lv6\n+91t1A4TnWXkFsIMtn/z2y0E6375Y/ahh3zMdppbCCWxdGmVW26pFB2GWdN8zLaHr0Mws54zb17R\nEZSPE0JJzJtXKToEs5YMDlaKDqF0nBBKIk0ea9YzPGbQeR5UnsH2nzmySv06BA/QWS/w/Fud54Qw\ng+W/+Gs1zxxpZhNzl1FJ9PdXig7BrEWVogMoHSeEknDL23rNmjVFR1A+TgglMTRULToEs5b4mO08\njyGUxNBQ0RGYNZY/EeKuu3ylcqe5hVAalaIDMGtRpegASqeZeyr/HfBG4JGIOD7VrQL+K/BoWu19\nEXFzem4F2f2UdwPLImJ9qq/fT/kgsvspXzLO/jx1xTTxDcutl3lCxvaY0j2VJb0aeAq4OpcQVgJP\nRsTHRq27CLgGOAk4GrgVWBgRIWkzcHFEbJZ0E3B5RNwyxv6cENpgYKDK0FCl6DDMmua5jNpjSrOd\nRsRtkvrHet0x6s4Ero2IZ4CapAeAkyU9BBwSEZvTelcDZwHPSgg2fdwfa71s6dKiIyifqQwqv0vS\n24BvA38WESPAUcDtuXW2kbUUnknluuFUb22U/+K//faKL0yznvLud1eKDqF0JjuofCXwYmAAeBj4\n6LRFZG3x9NNFR2Bm3W5SLYSIeKRelnQVcENaHAbm51Y9hqxlMJzK+frh8V5/cHCQ/jQbW19fHwMD\nA3vnNKmmPhAvt7a8fTtApWvi8bKXGy3Xy90ST68uDw0NMTIyAkCtwS3omrpBThpDuCE3qHxkRDyc\nypcCJ0XE+blB5SXsG1RekAaV7wCWAZuBG/GgctuNntxu5coK4DEE6w1VT27XFlM9y+ha4DTgcGAH\nsJLsBOEBIIAHgXdExI60/vvITjvdBVwSEetSff200zlkp50uG2d/Tght4PvTWjeSxvxeasjfEZM3\npYTQaU4I0+fjH4frr8/KmzbBaadl5bPOgne/u7i4zJohgb8Kpp8TgjF3bpXHH68UHYZZ0wYHq6xZ\nUyk6jBnH91Q2s54zOFh0BOXjFsIM5qkrzGw0dxkZnhfGzGCKU1dY78q3EDZtqrJqVQVwC8F6g087\n7Ty3EErCg8rWa5wQ2sODysaBB1aKDsGsJdVqpegQSscthBnM1yFYL/N1CO3hQWVjzpwqv/hFpegw\nzJomVYmoFB3GjONB5ZLKDyo//bTvh2BmE/MYQmlUig7ArEWVogMoHScEMzMDnBBKpFp0AGYtueCC\natEhlI4Tgpl1Jc9l1Hk+y2gGO/ts2LgxKz/xBBx6aFY+/XT46leLi8vMiuOzjErqkkvgZS/LyqtX\n77v2wGcYmdlY3GU0gw0N5U89re4tDw0VGZVZc6qejbHj3EKYwQYGIN1bm02b9rUMBgYKC8nMuljD\nFoKkv5O0Q9KWXN1cSRsk/UDSekl9uedWSLpf0r2SzsjVL5a0JT33iel/Kzba/i2EilsI1lM8l1Hn\nNdNl9Flg6ai65cCGiDgW+EZaRtIi4FxgUdrmU9p3F+0rgQsjYiGwUNLo17RpNjCw/1XJ9bJbCNYL\nVq8uOoLyaeosI0n9wA0RcXxavhc4LSJ2SJoHVCPipZJWAHsi4rK03i3AKuAh4JsRcVyq/32gEhF/\nOsa+fJZRG3heGOs1Pmbbox3TXx8RETtSeQdwRCofBWzLrbcNOHqM+uFUb2ZmXWLKZxmln/P+Sd+F\nLr4Y+vuzB1T2li++uMiozJpVKTqA0pnsWUY7JM2LiO2SjgQeSfXDwPzceseQtQyGUzlfPzzeiw8O\nDtKffYvR19fHwMDA3jsn1U9F83Lj5aEh+PGPs2WosH077NlTZdOmbLno+LzsZS+3f3loaIiRdLph\nrVZjIpMdQ/gw8FhEXCZpOdAXEcvToPI1wBKyLqFbgQUREZLuAJYBm4Ebgcsj4pYx9uUxhGly8cXw\n9a9n5YceqvKiF1UAeNOb4IoriovLrBmDg1XWrKkUHcaMM6UrlSVdC5wGHC7pR8D/BD4EXCfpQqAG\nnAMQEVslXQdsBXYBF+W+3S8C1gBzgJvGSgY2vRYsqHcXwUMP7SsvWFBURGbN81xGnee5jErCtyM0\nM/BcRqWVv2Ma+I5pZjYxz2VUGtWiAzBrSTX/a8Y6wi2EGexLX9o3qAywZk329yc/cQvBzJ7NLYTS\nqBQdgFlLPJdR53lQeQbLjyGsXg0rV2ZljyFYL/CJEO0x0aCyE8IMdvzx8P3vZ+Xdu6vMnl0B4Ljj\nYMuW8bcz6waey6g9fJZRST36KOzevW+5Xn700WLiMbPu5oQwg82ZkzW7ASIqe8tz5hQXk1nzKkUH\nUDpOCDPYE0/s3wdbLz/xRDHxmFl381lGM9jRR8Ps2dkDqnvLR3viceuwuXOz1morD6i2vM3cuUW/\n097mFsIMduGFcP31WXnTJjj11Kx81lnFxWTltHNn62cMVautnw2nMYdKrVk+y2gGO+CA/QeV62bP\nhl27Oh+PlVenTiH1qaqNteOOadYDjjsu32W0r3zcccXGZWbdyQlhBhsezloIWSuhurc8PO6ticy6\nh+cy6jwnBDMzAzyGMKPNmjV2f6oEe/Z0Ph4rL48hdA+PIZiZWUNOCDPY/r+UquPUm3UnjyF03pQS\ngqSapO9JulPS5lQ3V9IGST+QtF5SX279FZLul3SvpDOmGryZmU2fKY0hSHoQWBwRj+fqPgz8JCI+\nLOkvgMMiYrmkRcA1wEnA0cCtwLERsWfUa3oMYZoccgg89dSz6w8+GJ58svPxWHl5DKF7tHsMYfQL\nvxlYm8prgfp1sWcC10bEMxFRAx4AlkzD/m0cYyWDierNrNymmhACuFXStyW9PdUdERE7UnkHcEQq\nHwVsy227jaylYNNE0n6PLFfXHxv3W95/PbPu4zGEzpvqXEanRMTDkl4AbJB0b/7JiAhJEzXg3Lib\nRhN1tWU3G/HHbWbjm1JCiIiH099HJX2VrAtoh6R5EbFd0pHAI2n1YWB+bvNjUt2zDA4O0t/fD0Bf\nXx8DAwNU0ixX9V8NXm5tuT63fLfE4+VyLU/m+KtUKi3vD6pkk+J11/svcnloaIiRkREAarUaE5n0\noLKkXwFmR8STkp4HrAdWA78NPBYRl0laDvSNGlRewr5B5QWjR5A9qGw283hQuXu0a1D5COA2SUPA\nHcDXI2I98CHgP0v6AfDatExEbAWuA7YCNwMX+Zu/c/b9UjPrvKDFGxtIVFu9GYKU7ccmbdJdRhHx\nIDAwRv3jZK2Esbb5IPDBye7TzHqTiNZ/uU/ihgiSByanwnMZmVnbucuoe3guIzMza8gJoSQGB6tF\nh2DWEo97dZ4TQkmsXdt4HTMrN48hlIT7Vq1IHkPoHh5DMDOzhpwQSqNadABmLfEYQuc5IZiZGeAx\nhJ40dy7s3Nn+/Rx2GDz+eOP1zBrxGEL3mGgMYaqznVoBdu7s3H8us+nSiePpsMPav4+ZzF1GJeH+\nWCtSROsPqLa8jVu0U+OEYGZmgMcQepL7Y60MfPy1h69DMDOzhpwQepDnlrdyqBYdQOk4IfQgMYkR\nuo0bW95GnlneCnTBBUVHUD4eQ+hBHkMws8nyGIKZmTXU8YQgaamkeyXdL+kvOr3/maL1IYFqy9v4\nIh8rkq+d6byOXqksaTZwBdk9l4eBb0n6x4j4fifj6HWT6caRhoioTHssZlOhSV6+7G7l9uh0C2EJ\n8EBE1CLiGeALwJkdjqGkRooOwOxZImLcx8qVK8d9ztqj0wnhaOBHueVtqc7MzArW6YTg1F6YWtEB\nmLWkVqsVHULpdHq202Fgfm55PlkrYT+T7Ve0iUm+sbL1lrW+GXhHdfQ6BEkHAPcBvwX8GNgMnOdB\nZTOz4nVRnq93AAAC/UlEQVS0hRARuyRdDKwDZgOfcTIwM+sOXXelspmZFcNXKs9wvhDQeo2kv5O0\nQ9KWomMpGyeEGSx3IeBSYBFwnqTjio3KrKHPkh2z1mFOCDObLwS0nhMRtwE7i46jjJwQZjZfCGhm\nTXNCmNl8xoCZNc0JYWZr6kJAMzNwQpjpvg0slNQv6UDgXOAfC47JzLqUE8IMFhG7gPqFgFuBL/pC\nQOt2kq4F/hU4VtKPJP1R0TGVhS9MMzMzwC0EMzNLnBDMzAxwQjAzs8QJwczMACcEMzNLnBDMzAxw\nQjAbl6QLJB1ZdBxmneKEYDa+QeCoooMw6xRfmGalIuk9QP3K16uArwE3RMTx6fn3As8D7gbWkM0H\n9XPgVcDxwMfT8/8BvBbYDVwJLAZ2Ae+JiKqkQeAs4FeAhcBHgYOA89O2b4iInZJeQnbPihek/bw9\nIu5r3ydgNj63EKw0JC0m+9W/BHgl8Hagb9RqAUREfJlsLqjzI+LlwB6y+0ksi4gB4LeAp4F3Arsj\n4gTgPGCtpOem1/oN4GzgJOAvgZ+m1/o34G1pnf8LvCsiXgH8OfCp6X7fZs06oOgAzDroVOArEfEL\nAElfAV4zxnoao/yfgIcj4jsAEfFUeo1TgMtT3X2SHgKOJUssGyPiZ8DPJI0AN6TX2gKcIOl5ZC2P\nf5D27vLA6XijZpPhhGBlEjz7y/5Q9m8pz2H/+0g006eqcer/I1fek1veQ/Z/bxawMyJObGIfZm3n\nLiMrk9uAsyTNSb/OzwJuBn5N0tzU1fOm3PpPAr+ayvcBR0p6BYCkQ9I9q28D3prqjgVeCNzL+EmC\n+nMR8STwoKTfS9tL0gnT81bNWueEYKUREXeSDRRvBm4H/jYivg18INWtJ5smvG4N8GlJ3yX7v3Iu\n8ElJQ2RTij+XrM9/lqTvkY0xXJDuXx2M39LIP/dW4ML0mncDb56u92vWKp9lZGZmgFsIZmaWOCGY\nmRnghGBmZokTgpmZAU4IZmaWOCGYmRnghGBmZokTgpmZAfD/AVptyBX+YxsRAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xc545f28>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# looks like an excellent feature, but you can't know the duration of a call beforehand, thus it can't be used in your model\n",
    "bank.boxplot(column='duration', by='outcome')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### previous"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "previous\n",
       "0    0.082884\n",
       "1    0.208421\n",
       "2    0.410256\n",
       "3    0.600000\n",
       "4    0.714286\n",
       "5    1.000000\n",
       "6    0.500000\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature\n",
    "bank.groupby('previous').outcome.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### poutcome"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "poutcome\n",
       "failure        0.147577\n",
       "nonexistent    0.082884\n",
       "success        0.647887\n",
       "Name: outcome, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like a useful feature\n",
    "bank.groupby('poutcome').outcome.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# create poutcome_dummies\n",
    "poutcome_dummies = pd.get_dummies(bank.poutcome, prefix='poutcome')\n",
    "poutcome_dummies.drop(poutcome_dummies.columns[0], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# concatenate bank DataFrame with job_dummies and poutcome_dummies\n",
    "bank = pd.concat([bank, job_dummies, poutcome_dummies], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### euribor3m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xc553668>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAEaCAYAAADntGc9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF5JJREFUeJzt3XmYZXV95/H3p2lExKVBTZSIljGSaCIWiowRlzLJo+i4\nzmgIOEo7GZMncYk6LvGZ8QmaZKJxRhMlMU8kARO3ZBTIMOMCMV0GxwWXLgRRJomUouPCSKO4C/2d\nP84puJRVfW91V517zu3363kKzn5+995ff+vcz1kqVYUkaZi2TbsBkqT9ZxGXpAGziEvSgFnEJWnA\nLOKSNGAWcUkaMIv4DEtyY5LdSZaSfCLJz2/y9heSXDBmmUds9n67kGQ5yVFrTP9Wh224f5LHdLU/\nDZNFfLZ9p6qOr6p54GXAH0yhDY8EHrK/K6e1ie2Z1Ho3UHR5Y8XxwGM73J8GyCJ+8LgDcC3cVBhf\nk+SyJJ9K8svt9D9K8vJ2+NFJPtAue06SP0vysSRXJvnXqzee5Kgk5ye5NMmHk9wvyRzw68AL2m8E\nD121zp2TXJTk8iRvWjn6TTLX7ufNwGXAMeu09xbfBJKcmeT0dng5yavb5T+a5F4j+3xnkkvan4e0\n0++Y5MKVtgDr/uJI8tp2ub9Pcqck90ryiZH59x4dH5k+n+Qj7Xt0bpId7fTFJA9sh++U5KokhwKv\nBE5p37unJrltkrPb13Rpkie365zaTrssyatG9vetJH/YtvWiJA9uP9N/SfL4dplD2vf2knabv7be\n61ZPVZU/M/oD3ADsBj4DXAcc307/t8CFNIXqx4DPAz8OHA5cTnP0/Fngnu3y5wDvbod/CrgaOAxY\nAC5op78BeHk7/Ehgdzv8O8AL12nfmcBL2+FHA3uBo4A54EbgxH209y6j+x9pwzPa4auAl7XDTx9p\n59uAk9rhuwNXtMOvB/5zO/zYlbas0ea9wKnt8MuBN7TD/wDcvx3+L8Cz11j3U8DD2uFXAK9rh3cB\nD2iH7wRc1Q6fDrx+ZP1XA68dGd8BHN2+H3cEDgHeDzxxpK2PbofPbd/DQ4DjRj6fXwP+Uzt8GPAx\nYG7afdefyX88Ep9t360mTrkPcDLw1+30hwJvq8bXgA/QFMzvAs8CLqIpTle1yxfwtwBV9c/A54Cf\nWbWvk1a2X1W7gDsmuV07b72j2pOAd7TrvA/YMzLv81V1ychyq9v7IMZHG29v//8OYCWX/yXgzCS7\ngb8DbpfkCOBhwFvatrx7VVtG7QX+ph1+C817CXAW8Mwk24BfpvllcZMkdwDuUFUXt5PeDDx8TPvD\nLd+7XwT+ZGWkqq6jeR92VdXXq+pG4K0j2/1B+75C841mV7vM5TS/KAEeBTyjfT8+QvNL9KfGtEs9\nsn3aDVA3quoj7Vf1O9MUv9HiEG4uiMcB1wA/MWaTe9eYtj/Z9XrrfHvMckXzTWP0QOTwfexn5fUF\n+FdV9YNbbLyJ3Tfa/tH37Vyabx3/AHy8qtb7JTC67orR13HrDawH+/4sfzgyfS/wA4Cq2ptk9N/+\nc6rqojH7VU95JH6QSPIzNJ/3/wMupslat7VF/WHAJUnuAbyQ5oTaY5KcuLI68NQ2H78X8JPAlat2\ncTHwtHZfC8A1VXU9cD1wO9b2v2mOWknyKODIdZZb3d6HA5cAXwDum+RWbb78C6vWO2Xk/x9qhy8E\nnjfyvty/HfxH4LR22mP20ZZtwFPb4dPatlFV3wPeB7wROHv1SlX1DWDPyHmBpwOL7fAycEI7/JSR\n1b7JLd+7i4Bnj7R9B8378Ig20z8E+BWabyqTeh/wmytFPcmxSW6zgfU1ZRbx2XZ4e1JsN02kcHob\nSZxHk89eSpOhvriNKc4C/mNVfQX4VeCsJIfRHNl9gaZgvBv49fZItrj5qO8M4IFJLqXJhE9vp18A\nPLltx0mr2vcK4FFJLqMpXl+hKfqMbJf12ltVV9PEPJfTRByfXLX9I9v2PBd4QTvtecAJ7Um8T9Oc\neF1py8OTXA48mSZnXsu3gRPbNi/QnHxc8TaaI94L11n3dOA1bZuOG1n3vwK/keSTNNn2ymvfRfNL\naneSpwK/176my5IsAQvtZ/Xb7bJLNN8CVk72ro6bao3hs4ArgE+2r+mN+A19UFLlo2i1b0nOpjkx\neO4mb/dWwI1VdWOaa8n/pKoecADbuxz4zar6xyR7gAur6pRx622WJC8CbldVv9PVPiV/42qa7g78\nbXsy8Ac0J1X3W1X93OjogWxrtSQvAJ4D3Bn4Ls03kue1kRFJzgPuyY9GOtKW8khcg5dke1XdsGra\nGcC9qurpm7D9bTRXc+ypqj1JjgTeCXysqn77QLcvHQgzcU1VkqOTvCvJ15J8Lslz2+nnJPndkeUW\nklw9Mr6c5CVJPgVc3960spxk5Ui4gFsneUeSb6Z57MBxI+vfp73JZk97M8zjR+adk+SNSd6d5jb7\nhar63MgVJ9tosu8vj6yzN8lvJPmndn+vTHMT0IeTXNe249AteAt1kLOIa2raI9wLaG5IOprmOujn\nt1eqTPIV8VeAxwA72uufR9cJ8ESaE59H0px0PL8t9oe2+30vTTzyXOCtSY4dWf9U4Her6rY0V9GQ\n5LQk36C5BPOaqvrjVe15FM2VPQ8GXgq8qd3O3YH7tcPSprKIa5oeBNypqn6vqm5oby46i6Y4j1M0\ndzN+qaq+v84yH6+qc9sC/1qaa7B/nqbIHlFVr2r3uwv4n9yyyJ5fVR8GWNl+Vb2tqu4AHAvcp83J\nR/1hVX2rqq6gubnmPVW1XFXfBN5DU+ClTWUR1zTdAzi6jTT2tFeUvIzm1vpJXD1m/hdXBqo5+fNF\nmiP+u66x7ufbedD8glh32+1dq68CnrFq1ldHhr+7xvhtx7RX2jCvTtE0fYHmOSHHrp6R5Exg9KaT\nu6yx/rjI5ZiR7W0D7gZ8iSZqOSZJ6uYz+/egeV7MpA4FvrOB5aUt4ZG4pukSmpOSL0lyeJtX/1yS\nE2huXHlskiOT3AV4/n5s/4FJntzejfh84Hs0zwe5hKYAvyTJoe0dpo+jfY4La9x+n+Q/tHeLkuS+\nNDfYvGvM/lffDi9tOou4pqaq9tIUz3mah2pdA/w5cHuah2ldSnNL+ntpCuxGroct4HyaW+6vpXkk\nwL+pqhvbu00fT3NS9Bqapyk+var+z8i6q/f1EOCyJNcD5wF/Bbxu1f7WasPosNfzatONvU68fT7D\nWcDP0nTCf19VH+mgbZKkMSbJxP+Y5lnST2m/lh6xxW2SJE1on0fiaZ6BvLuqfrK7JkmSJjUuE78n\ncE2aPwn1yTR/QsvHVEpST4wr4tuBBwB/2j5d7ts0Z+UlST0wLhP/IvDFqvpYO/5OVhXxJJ5xl6Qt\nVlVrXqa6zyJeVV9JcnWSY9vLr34J+PQay21OK3ULZ5xxBmeccca0myFNzD67NZL1bzOY5OqUlYcD\n3Qr4F+CZm9QuSdIBGlvEq+pSmgcVqWPLy8vTboK0IfbZ7nnHZo/Nz89PuwnShthnu3fAf9nnls8Q\nkiRttiTrntj0SFySBswi3mOLi4vTboK0IfbZ7lnEJWnAzMQlqefMxCVpRlnEe8x8UUNjn+2eRVyS\nBsxMXJJ6zkxckmaURbzHzBc1NPbZ7lnEJWnAzMQlqefMxCVpRlnEe8x8UUNjn+2eRVySBsxMXJJ6\nzkxckmaURbzHzBc1NPbZ7lnEJWnAzMQlqefMxCVpRlnEe8x8UUNjn+2eRVySBsxMXJJ6zkxckmbU\nREU8yXKSTyXZneSSrW6UGuaLGhr7bPe2T7hcAQtVde1WNkaStDETZeJJrgJOqKqvrzHPTFySttBm\nZOIF/H2Sjyd51uY1TUn260eSYPIiflJVHQ88Bnh2kodtYZsOKlW17g/s2sc8qX/MxLs3USZeVV9u\n/39NkvOAE4GLV+bv3LmTubk5AHbs2MH8/DwLCwvAzR+q4447PvvjS0tLvWrPUMdXhpeXlxlnbCae\n5DbAIVV1fZIjgAuBV1TVhe18M/EJHHUU7Nmz9fs58ki41tPP2kL7G+dZJ/bfvjLxSY7Efxw4r/3g\ntgNvXSngmtyePdBFHzYu11bbVzFOuunnupl3bHZkfzr34uLiTV+ztnI/0mZJFqlamHYzZo53bErS\njPJIvCtd5hx+HtoEnsfpj30diVvEO9JVzGGcos1in+0P45SBGr3cSBoC+2z3LOKSNGDGKR3xq6mG\nxj7bH8YpkjSjLOI9Zr6oobHPds8iLkkDZibeEfNFDY19tj/MxCVpRlnEe8x8UUNjn+2eRVySBsxM\nvCPmixoa+2x/mIlL0oyyiPeY+aKGxj7bPYu4JA2YmXhHzBc1NPbZ/jATl6QZZRHvMfNFDY19tnsW\ncUkaMDPxjpgvamjss/1hJi5JM8oi3mPmixoa+2z3LOKSNGBm4h0xX9TQ2Gf7w0xckmbUREU8ySFJ\ndie5YKsbpJuZL2po7LPdm/RI/LeAKwC/9EhSj4zNxJPcDTgH+H3ghVX1+FXzzcQnYL6oobHP9seB\nZuKvA14M7N3UVkmSDtg+i3iSxwFfq6rdwJq/BbR1zBc1NPbZ7m0fM/8hwBOSPBa4NXD7JH9VVc8Y\nXWjnzp3Mzc0BsGPHDubn51lYWABu/lAP9nHoZn+wyOLi9F+v4wfn+NLS0n6t39W/j6GMrwwvLy8z\nzsTXiSd5BPAiM/H9Y76oobHP9sdmXifuWy1JPTJxEa+qD1TVE7ayMbql0a9W0hDYZ7vnHZuSNGA+\nO6Uj5osaGvtsf/jsFEmaURbxHjNf1NDYZ7tnEZekATMT74j5oobGPtsfZuKSNKMs4j1mvqihsc92\nzyIuSQNmJt4R80UNjX22P8zEJWlGWcR7zHxRQ2Of7Z5FXJIGzEy8I+aLGhr7bH+YiUvSjLKI95j5\noobGPts9i7gkDZiZeEfMFzU09tn+MBOXpBllEe8x80UNjX22exZxSRowM/GOmC9qaOyz/WEmLkkz\nyiLeY+aLGhr7bPcs4pI0YGbiHTFf1NDYZ/vDTFySZtTYIp7k1kk+mmQpyRVJ/qCLhsl8UcNjn+3e\n9nELVNX3kjyyqr6TZDvwwSQPraoPdtA+SdI+bCgTT3Ib4APA6VV1RTvNTHwC5osaGvtsfxxwJp5k\nW5Il4KvArpUCLkmaromKeFXtrap54G7Aw5MsbGmrBJgvanjss90bm4mPqqpvJPlfwAnA4sr0nTt3\nMjc3B8COHTuYn59nYWEBuPlDPdjHoZv9wSKLi9N/vY4fnONLS0v7tX5X/z6GMr4yvLy8zDhjM/Ek\ndwJuqKrrkhwOvA94RVW9v51vJj4B80UNjX22P/aViU9yJH5X4M1JttHEL3+9UsAlSdM1NhOvqsuq\n6gFVNV9Vx1XVa7pomMwXNTz22e55x6YkDZjPTumI+aKGxj7bHz47RZJmlEW8x8wXNTT22e5ZxCVp\nwMzEO2K+qKGxz/aHmbgkzSiLeI+ZL2po7LPds4hL0oCZiXfEfFFDY5/tDzNxSZpRFvEeM1/U0Nhn\nu2cRl6QBMxPviPmihsY+2x9m4pI0oyziPWa+qKGxz3bPIi5JA2Ym3hHzRQ2NfbY/zMQlaUZZxHvM\nfFFDY5/tnkVckgbMTLwj5osaGvtsf5iJS9KMsoj3mPmihsY+2z2LuCQNmJl4R8wXNTT22f4wE5ek\nGTW2iCc5JsmuJJ9OcnmS53XRMJkvanjss93bPsEyPwReUFVLSW4LfCLJRVX1mS1umyRpjA1n4knO\nB95QVe9vx83EJ2C+qKGxz/bHpmXiSeaA44GPHnizJEkHauIi3kYp7wR+q6q+tXVN0grzRQ2NfbZ7\nk2TiJDkUeBfwlqo6f/X8nTt3Mjc3B8COHTuYn59nYWEBuPlDPdjHoZv9wSKLi9N/vY4fnONLS0v7\ntX5X/z6GMr4yvLy8zDhjM/EkAd4MfL2qXrDGfDPxCZgvamjss/1xoJn4ScC/Ax6ZZHf7c/KmtlCS\ntF/GFvGq+mBVbauq+ao6vv15bxeNO9iNfrWShsA+2z3v2JSkAfPZKR0xX9TQ2Gf7w2enSNKMsoj3\nmPmihsY+2z2LuCQNmJl4R8wXNTT22f4wE5ekGWUR7zHzRQ2NfbZ7FnFJGjAz8Y6YL2po7LP9YSYu\nSTPKIt5j5osaGvts9yzikjRgZuIdMV/U0Nhn+8NMXJJmlEW8x8wXNTT22e5ZxCVpwMzEO5I106zN\nd+SRcO213exLs81MvD/2lYlP9NfudeD2p5PauSWNY5zSa4vTboC0IWbi3bOIS9KAmYn3mHGKpslM\nvD/MxCVtWBHo4IR8jfxXG2ec0mOnn7447SboIBaqOUTewM/irl0bXicW8ANiEe+xnTun3QJJfWcm\nLmlNZuL94bNTJGlGjS3iSf4yyVeTXNZFg3Qzr7nV0NhnuzfJkfjZwMlb3RBJ0saNLeJVdTGwp4O2\naJXFxYVpN0HakIWFhWk34aAz0YnNJHPABVV1vzXmeWJzi3jCR9Pkic3+8MTmYC1OuwHShpiJd29T\n7tjcuXMnc3NzAOzYsYP5+fmbvlatfKiOO+747I8vLS3t1/rQj/b3ZXxleHl5mXGMU3rMr5maJuOU\n/jigOCXJ24EPAccmuTrJMze7gZKk/TPJ1SmnVtXRVXVYVR1TVWd30TD57BQNz2gcoG54YrPHfHaK\npHF8doqkNfl3YfvD54lL2jD/LuwwGKf0mPmihmdx2g046FjEJWnALOI95rNTNDwL027AQccTmz1m\nvqihsc9uDZ+dMliL026AtCHe29A9i7ikTeO9Dd3zEsMpy5iLcdebbYSlaRnXZ9djn90aFvEps2Nr\naOyz/WKc0mNeJ66hsc92zyIuSQPmJYaS1HNeYihJM8oi3mPmixoa+2z3LOKSNGBm4pLUc2bikjSj\nLOI9Zr6oobHPds8iLkkDZiYuST1nJi5JM8oi3mPmixoa+2z3LOKSNGBm4pLUc2bikjSjxhbxJCcn\n+WySf0ry0i4apYb5oobGPtu9fRbxJIcAZwInA/cFTk1yny4aJlhaWpp2E6QNsc92b9yR+InAP1fV\nclX9EHgH8MStb5YArrvuumk3QdoQ+2z3xhXxnwCuHhn/YjtNktQD44q4l51M0fLy8rSbIG2IfbZ7\n+7zEMMmDgTOq6uR2/GXA3qp69cgyFnpJ2mLrXWI4rohvB64EfhH4v8AlwKlV9ZmtaKQkaWO272tm\nVd2Q5DnA+4BDgL+wgEtSfxzwHZuSpOnxjs0e8gYrDU2Sv0zy1SSXTbstBxuLeM94g5UG6myaPquO\nWcT7xxusNDhVdTGwZ9rtOBhZxPvHG6wkTcwi3j+eaZY0MYt4/3wJOGZk/Biao3FJ+hEW8f75OHDv\nJHNJbgWcAvyPKbdJUk9ZxHumqm4AVm6wugL4G2+wUt8leTvwIeDYJFcneea023Sw8GYfSRowj8Ql\nacAs4pI0YBZxSRowi7gkDZhFXJIGzCIuSQNmEddMSXJ6krtOux1SVyzimjU7gaOn3QipK97so95L\n8kJg5Q7As4C/Ay6oqvu1818EHAFcDpxD8/yZ7wAPAe4H/FE7//vALwA3Am8EHgjcALywqhaT7ASe\nBNwGuDfw34BbA6e16z62qvYkuRfNM9/v3O7nWVV15da9A9L6PBJXryV5IM3R9YnAg4FnATtWLVZA\nVdW7aJ49c1pVPQDYS/M89udV1TzNH/z+HvBs4MaqOg44FXhzksPabf0s8GTgQcDvA99st/Vh4Bnt\nMn8OPLeqTgBeDPzpZr9uaVL7/EPJUg88FDi3qr4LkORc4OFrLJc1hn8a+HJVfQKgqr7VbuMk4PXt\ntCuTfB44luaXwa6q+jbw7STXARe027oMOC7JETRH+P89uWmXt9qMFyrtD4u4+q740QJ9B275LfJw\nbvkc9kkywqwz/fsjw3tHxvfS/HvZBuypquMn2Ie05YxT1HcXA09Kcnh7FPwk4D3AjyU5qo1BHjey\n/PXA7dvhK4G7JjkBIMnt2r9hejHwtHbascDdgc+yfmFnZV5VXQ9cleQp7fpJctzmvFRp4yzi6rWq\n2k1zsvIS4CPAm6rq48Ar22kX0jyyd8U5wJ8l+SRN/z4FeEOSJZrH+x5Gk2FvS/Ipmsz89PbvmRbr\nH9GPznsa8KvtNi8HnrBZr1faKK9OkaQB80hckgbMIi5JA2YRl6QBs4hL0oBZxCVpwCzikjRgFnFJ\nGjCLuCQN2P8HEgDoA6g+mhcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xc7ab5f8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# looks like an excellent feature\n",
    "bank.boxplot(column='euribor3m', by='outcome')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Model building\n",
    "\n",
    "- Use cross-validation to evaluate the AUC of a logistic regression model with your chosen features\n",
    "- Try to increase the AUC by selecting different sets of features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'age', u'job', u'marital', u'education', u'default', u'housing',\n",
       "       u'loan', u'contact', u'month', u'day_of_week', u'duration', u'campaign',\n",
       "       u'pdays', u'previous', u'poutcome', u'emp.var.rate', u'cons.price.idx',\n",
       "       u'cons.conf.idx', u'euribor3m', u'nr.employed', u'y', u'outcome',\n",
       "       u'job_blue-collar', u'job_entrepreneur', u'job_housemaid',\n",
       "       u'job_management', u'job_retired', u'job_self-employed',\n",
       "       u'job_services', u'job_student', u'job_technician', u'job_unemployed',\n",
       "       u'job_unknown', u'poutcome_nonexistent', u'poutcome_success'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# new list of columns (including dummy columns)\n",
    "bank.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# create X (including 13 dummy columns)\n",
    "feature_cols = ['default', 'contact', 'previous', 'euribor3m'] + list(bank.columns[-13:])\n",
    "X = bank[feature_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# create y\n",
    "y = bank.outcome"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.75566564072331199"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# calculate cross-validated AUC\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "logreg = LogisticRegression(C=1e9)\n",
    "cross_val_score(logreg, X, y, cv=10, scoring='roc_auc').mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
